<--- %%NOBANNER%% --> lackfit.sas
 BackForward
/*------------------<--- Start of Description -->--------------------\
| lfit.sas - Lack of Fit Analysis for SAS ANOVA procedure;           |
| What is a lack of fit analysis?                                    |
| When you run a regression or ANOVA procedure, you make some        |
| assumption about how the predictor variables relate to the         |
| response. For example, in simple linear regression the assumed     |
| relationship is EY = b0 + b1*x. The lack of fit analysis seeks to  |
| determine whether, if we restrict our model-building to the class  |
| of all linear models, whether our particular model is sufficient.  |
| It does so by comparing the sum of residual squares in our model to|
| the sum of residual squares in the most flexible linear model      |
| available--an ANOVA where all variables are considered categorical.|
| For this procedure to work, there must be at least one point where |
| at least two measures; the more such points the merrier.           |
| Invocation:                                                        |
| First, before calling the macro, perform a regression with PROC    |
| GLM, and use the OUTSTAT option. Then use the %lackfit macro to    |
| show a lack-of-fit table.                                          |
|--------------------<--- End of Description -->---------------------|
|--------------------------------------------------------------------|
|--------------<--- Start of Files or Arguments Needed -->-----------|
| Arguments needed:                                                  |
|   outstat=  The data set specified in the outstats=    |
|                         option in PROC GLM. This is required.      |
|   data=     The original data set you wanted to        |
|                         analyze Required.                          |
|   model=    The main effects model you want to compare |
|                         your regression to. I.e. no interactions   |
|                         or higher-order terms (they'll produce an  |
|                         error). Don't worry, you can still have    |
|                         such terms in your PROC GLM statement. Each|
|                         variable you list (separated by spaces) is |
|                         used as a categorical variable in the      |
|                         analysis Required.                         |
|   response= The response variable you analyzed in PROC |
|                         GLM Required.                              |
|   title=         lack-of-fit analysis.                      |
|                         Not required, but has a default value.     |
|   footnote= A footnote you can give to your table.     |
|                         Not required;                              |
|   alpha=<.05>           Determines the level of significance.      |
|                         Not required.                              |
|                         Doesn't really affect anything except the  |
|                         statement "(Not) significant at alpha=..." |
|                         printed at the end.                        |
| Note: Do not name any of your datasets work._tmp_, work._tmp1_,    |
|       work._tmp2_, work._tmp3_, work._tmp4_, or work._tmp5_, as    |
|       these will get clobbered. It's not a good idea to use        |
|       underscores surrounding datasets anyway, as sicko SAS macro- |
|       writers like me tend to use them with impunity for some      |
|       arcane internal function.                                    |
|---------------<--- End of Files or Arguments Needed -->------------|
|--------------------------------------------------------------------|
|----------------<--- Start of Example and Usage -->-----------------|
| Example:                                                           |
| data foo;                                                          |
|    input x y;                                                      |
|    cards;                                                          |
| 1 2                                                                |
| 1 3                                                                |
| 2 7                                                                |
| 3 9                                                                |
| 3 7                                                                |
| 4 3                                                                |
| 5 13                                                               |
| 5 10                                                               |
| ;                                                                  |
| proc glm outstat=stats;                                            |
|     model y=x;                                                     |
| quit;                                                              |
| %lackfit(outstats=stats,data=foo,model=x,response=y,               |
|          title 'Lack of fit of foo data');                         |
| Usage: %lackfit(outstat=, data=, model=, response=y, alpha=.05,    |
                  footnote=, title='Lack of Fit Analysis');          |
\-------------------<--- End of Example and Usage -->---------------*/
%macro lackfit(outstat=, data=, model=, response=y, alpha=.05, 
               footnote=, title='Lack of Fit Analysis');
/*--------------------------------------------\
| Authos:  John D. Johnson;                   |
| Created: 1998;                              |
| Purpose: Lack of Fit Analysis;              |
\--------------------------------------------*/
options nonotes;
%if (%length(&model) = 0) %then %do;
   %put ERROR: LFIT.SAS: MODEL= is required;
   %goto exit;
%end;

%if (%length(outstat) = 0) %then %do;
   %put ERROR: LFIT.SAS: OUTSTAT= is required;
   %goto exit;
%end;

%if (%length(data) = 0) %then %do;
   %put ERROR: LFIT.SAS: DATA= is required;
   %goto exit;
%end;
proc sort data=&data;
   by &model;

proc means noprint;
   by &model;
   var &response;
   output out = _tmp_
          n = dferror
          css = sspe;

data _tmp_;
   set _tmp_;
   dferror=dferror-1;

proc means noprint data=_tmp_;
   var sspe;
   output out = _tmp2_
          sum = sspe;

proc means noprint data=_tmp_;
   var dferror;
   output out = _tmp3_
          sum = dferror;                    

data _tmp4_;
   set &outstat;
   if _type_ = 'ERROR';
   drop _type_;

data _tmp5_;
   merge _tmp4_ _tmp2_ _tmp3_;
   ssbc = ss-sspe;
   dfbc = df-dferror;
   msbc = ssbc/dfbc;
   mspe = sspe/dferror;
   fbc = msbc/mspe;
   pf = 1- probf(fbc,dfbc,dferror);
   file print;
   put;
   put;
   put &Title;
   put;
   put 'Source       df  SS          MS          F       Prob>F';
   put '-------------------------------------------------------';
   put 'Between Cell ' @14 dfbc 3.0 @18 ssbc 9.4 @30 msbc 9.4 @42 fbc 6.3 @50 pf 5.4;
   put 'Pure Error   ' @14 dferror 3.0 @18 sspe 9.4 @30 mspe 9.4;
   put '-------------------------------------------------------';
   put 'Total Error   ' @14 df 3.0 @18 ss 9.4;
   put;
   put &footnote;
   put;
   if (pf=.) then put "A lack of fit analysis could not be performed. Check for repeated measures.";
   else if (pf<&alpha) then put "There is a significant lack of fit at alpha=&alpha";
   else if (pf>=&alpha) then put "There is no significant lack of fit at alpha=&alpha";
   else put "Something bizarre just occurred. Do not trust the results of this analysis."
run;
proc datasets ;
   delete _tmp_ _tmp1_ _tmp2_ _tmp3_ _tmp4_ _tmp5_;
   title " " ;
run;quit;
options notes;
%exit:
%mend lackfit;